PCA Plots

  • I have 19,217 genes (rows) and 12 samples (columns) here.
##  [1] "WP3_D12_1"    "WP3_D12_2"    "WP3_D16_1"    "WP3_D16_2"    "PKD_10_D12_1"
##  [6] "PKD_10_D12_2" "PKD_10_D16_1" "PKD_10_D16_2" "PKD_11_D12_1" "PKD_11_D12_2"
## [11] "PKD_11_D16_1" "PKD_11_D16_2"
## # A tibble: 12 × 4
##    sample_id    group  day   rep  
##    <chr>        <chr>  <chr> <chr>
##  1 WP3_D12_1    WP3    D12   1    
##  2 WP3_D12_2    WP3    D12   2    
##  3 WP3_D16_1    WP3    D16   1    
##  4 WP3_D16_2    WP3    D16   2    
##  5 PKD_10_D12_1 PKD_10 D12   1    
##  6 PKD_10_D12_2 PKD_10 D12   2    
##  7 PKD_10_D16_1 PKD_10 D16   1    
##  8 PKD_10_D16_2 PKD_10 D16   2    
##  9 PKD_11_D12_1 PKD_11 D12   1    
## 10 PKD_11_D12_2 PKD_11 D12   2    
## 11 PKD_11_D16_1 PKD_11 D16   1    
## 12 PKD_11_D16_2 PKD_11 D16   2
## # A tibble: 12 × 5
##    sample_id    group  day   rep   condition
##    <chr>        <chr>  <chr> <chr> <chr>    
##  1 WP3_D12_1    WP3    D12   1     WT       
##  2 WP3_D12_2    WP3    D12   2     WT       
##  3 WP3_D16_1    WP3    D16   1     WT       
##  4 WP3_D16_2    WP3    D16   2     WT       
##  5 PKD_10_D12_1 PKD_10 D12   1     KO       
##  6 PKD_10_D12_2 PKD_10 D12   2     KO       
##  7 PKD_10_D16_1 PKD_10 D16   1     KO       
##  8 PKD_10_D16_2 PKD_10 D16   2     KO       
##  9 PKD_11_D12_1 PKD_11 D12   1     KO       
## 10 PKD_11_D12_2 PKD_11 D12   2     KO       
## 11 PKD_11_D16_1 PKD_11 D16   1     KO       
## 12 PKD_11_D16_2 PKD_11 D16   2     KO
## 
## PKD_10 PKD_11    WP3 
##      4      4      4
var_explained <- (pca$sdev^2 / sum(pca$sdev^2)) * 100
x_lab <- paste0("PC1 (", round(var_explained[1], 1), "%)")
y_lab <- paste0("PC2 (", round(var_explained[2], 1), "%)")


pca_df$group <- factor(pca_df$group,
                       levels = c("WP3", "PKD_10", "PKD_11"))  # WP3 = WT

ggplot(pca_df, aes(x = PC1, y = PC2, color = group, shape = day)) +
    geom_point(size = 7, alpha = 0.75) +
    scale_color_manual(
    values = c(
      "WP3"     = "#E0BA3E",  # WT first
      "PKD_10"  = "#529DDA", 
      "PKD_11"  = "#7570b3"
    ),
    name = "Experimental Group",
    labels = c("WT", "PKD10", "PKD11"),
    guide = guide_legend(order = 1)
  )+
    scale_shape_manual(
    values = c("D12" = 16, "D16" = 17),
    name = "Timepoint",
    labels = c("Day 12", "Day 16"),
    guide = guide_legend(order = 2)
  ) +
  theme_minimal(base_size = 14, base_family = "Helvetica Neue Light")+
  theme(
    legend.position = "right",          # move legend below plot
    legend.box = "vertical",           # horizontal layout
    plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),  # center title
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 11),
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    
    panel.grid.major = element_line(color = "grey80", linewidth = 0.2),
    panel.grid.minor = element_line(color = "grey90", linewidth = 0.2),
  ) 

 #labs(title = "PCA of RNA-seq samples", x = x_lab, y = y_lab)
var_explained <- (pca$sdev^2 / sum(pca$sdev^2)) * 100
x_lab <- paste0("PC1 (", round(var_explained[1], 1), "%)")
y_lab <- paste0("PC2 (", round(var_explained[2], 1), "%)")


pca_df$condition <- factor(pca_df$condition,
                           levels = c("WT", "KO"))

ggplot(pca_df, aes(x = PC1, y = PC2, color = condition, shape = day)) +
  geom_point(size = 7, alpha = 0.75) +
  scale_color_manual(
    values = c( "WT" = "#E0BA3E",
                "KO" = "#529DDA"),
    name = "Condition",
    labels = c("WT", "PKD")
  ) +
  scale_shape_manual(
    values = c("D12" = 16, "D16" = 17),
    name = "Timepoint",
    labels = c("Day 12", "Day 16")
  ) +
  guides(
    color = guide_legend(order = 1),
    shape = guide_legend(order = 2)
  ) +
  theme_minimal(base_size = 14, base_family = "Helvetica Neue Light") +
  theme(
    legend.position = "right",
    legend.box = "vertical",
    plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 11),
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    panel.grid.major = element_line(color = "grey80", linewidth = 0.2),
    panel.grid.minor = element_line(color = "grey90", linewidth = 0.2)
  ) 

   #labs(title = "PCA of RNA-seq samples", x = x_lab, y = y_lab)
pc3_lab <- paste0("PC3 (", round(var_explained[3], 1), "%)")

ggplot(pca_df, aes(x = PC1, y = PC2,
                   color = condition,
                   shape = day,
                   size = PC3)) +
  geom_point(alpha = 0.75) +
  scale_color_manual(
    values = c("WT" = "#E0BA3E", "KO" = "#529DDA"),
    name = "Condition",
    labels = c("WT", "PKD")
  ) +
  scale_shape_manual(
    values = c("D12" = 16, "D16" = 17),
    name = "Timepoint",
    labels = c("Day 12", "Day 16")
  ) +
  scale_size_continuous(name = pc3_lab) +  # PC3 % here
  guides(
    color = guide_legend(order = 1),
    shape = guide_legend(order = 2),
    size  = guide_legend(order = 3)
  ) +
  theme_minimal(base_size = 14, base_family = "Helvetica Neue Light") +
  theme(
    legend.position = "right",
    legend.box = "vertical",
    plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 11),
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    panel.grid.major = element_line(color = "grey80", linewidth = 0.2),
    panel.grid.minor = element_line(color = "grey90", linewidth = 0.2)
  ) +
  labs(
    title = "PCA of RNA-seq samples",
    x = x_lab,
    y = y_lab
  )

var_explained <- (pca$sdev^2 / sum(pca$sdev^2)) * 100

x_lab <- paste0("PC1 (", round(var_explained[1], 1), "%)")
y_lab <- paste0("PC2 (", round(var_explained[2], 1), "%)")
z_lab <- paste0("PC3 (", round(var_explained[3], 1), "%)")
scatterplot3d(
  x = pca_df$PC1,
  y = pca_df$PC2,
  z = pca_df$PC3,
  color = ifelse(pca_df$condition == "WT", "#E0BA3E", "#529DDA"),
  pch = ifelse(pca_df$day == "D12", 16, 17),
  xlab = x_lab,
  ylab = y_lab,
  zlab = z_lab,
  main = "3D PCA of RNA-seq samples"
)

DEG

Volcano Plots

## Upregulated genes: 1006
## Downregulated genes: 1363
## Not significant: 16848

## Upregulated genes: 1398
## Downregulated genes: 989
## Not significant: 16830

Heat plots

sig_deg <- deg_results %>%
  dplyr::filter(adj.P.Val < 0.05 & abs(logFC) > 1)
top_genes <- rownames(sig_deg[order(sig_deg$adj.P.Val), ])[1:50]
mat_top <- log_mat[top_genes, ]
annotation_col <- data.frame(
  Condition = sample_df$condition,
  Day = sample_df$day
)
rownames(annotation_col) <- sample_df$sample_id
ann_colors <- list(
  Condition = c(Healthy = "#E0BA3E", PKD = "#529DDA"),
  Day = c(D12 = "#b3b3b3", D16 = "#4d4d4d")
)
my_colors <- colorRampPalette(c("blue", "white", "red"))(100)
mat_scaled <- t(scale(t(mat_top)))
library(pheatmap)

pheatmap(
  mat_scaled,
  color = my_colors,
  cluster_rows = TRUE,
  cluster_cols = TRUE,
  annotation_col = annotation_col,
  annotation_colors = ann_colors,
  fontsize = 10,
  fontsize_row = 8,
  border_color = NA,
  scale = "none",
  main = "Top 50 Differentially Expressed Genes (PKD vs Healthy)",
  clustering_method = "ward.D2",
  show_rownames = TRUE,
  show_colnames = FALSE,
  labels_row = rownames(mat_scaled),
  labels_col = colnames(mat_scaled),
  fontfamily = "Helvetica Neue Light"
)

# --- Gene list for Day 12 volcano highlights ---
genes_D12 <- c(
  "CHCHD2", "MKI67", "CDC25C", "CDC20", "IL17C", "COL22A1",
  "CYP2C9", "CCL4", "COL12A1", "CBR1"
)

# --- Normalize case to ensure matching ---
rownames(log_mat) <- toupper(rownames(log_mat))
genes_upper <- toupper(genes_D12)

# --- Subset only genes that exist ---
genes_found <- genes_upper[genes_upper %in% rownames(log_mat)]
cat("✅ Genes found:", paste(genes_found, collapse = ", "), "\n")
## ✅ Genes found: CHCHD2, MKI67, CDC25C, CDC20, IL17C, COL22A1, CYP2C9, CCL4, COL12A1, CBR1
# --- Subset metadata to only Day 12 samples ---
sample_df_d12 <- sample_df %>% dplyr::filter(day == "D12")
sample_ids_d12 <- sample_df_d12$sample_id

# --- Subset expression matrix to Day 12 samples and selected genes ---
mat_subset_d12 <- log_mat[genes_found, sample_ids_d12, drop = FALSE]

# --- Scale by gene (z-score) ---
mat_scaled_d12 <- t(scale(t(mat_subset_d12)))

# --- Build annotation table for columns (samples) ---
annotation_col <- data.frame(
  Condition = sample_df_d12$condition
)
rownames(annotation_col) <- sample_df_d12$sample_id

# --- Define colors ---
ann_colors <- list(
  Condition = c(Healthy = "#E0BA3E", PKD = "#529DDA")
)

# --- Draw the heatmap ---
p_d12 <- pheatmap(
  mat_scaled_d12,
  color = colorRampPalette(c("blue", "white", "red"))(100),
  cluster_rows = TRUE,
  cluster_cols = TRUE,
  annotation_col = annotation_col,
  annotation_colors = ann_colors,
  fontsize_row = 10,
  fontsize_col = 10,
  border_color = NA,
  scale = "none",
  main = "Day 12: PKD vs Healthy",
  clustering_method = "ward.D2",
  show_rownames = TRUE,
  show_colnames = FALSE,
  labels_row = rownames(mat_scaled_d12),
  labels_col = colnames(mat_scaled_d12),
  fontfamily = "Helvetica Neue Light"
)

# --- Gene list for Day 16 volcano highlights ---
genes_D16 <- c(
  "CHCHD2", "GJA1", "KCNE3", "SLC26A9", "KCNJ2", "CXCL14", "AQP2", "CCL28",
  "COL12A1", "CBR1", "KCNH2", "SLC12A3", "COL1A2", "COL22A1", "NID2", "CCL4"
)

# --- Normalize case to ensure matching ---
rownames(log_mat) <- toupper(rownames(log_mat))
genes_upper <- toupper(genes_D16)

# --- Subset only genes that exist ---
genes_found <- genes_upper[genes_upper %in% rownames(log_mat)]
cat("✅ Genes found:", paste(genes_found, collapse = ", "), "\n")
## ✅ Genes found: CHCHD2, GJA1, KCNE3, SLC26A9, KCNJ2, CXCL14, AQP2, CCL28, COL12A1, CBR1, KCNH2, SLC12A3, COL1A2, COL22A1, NID2, CCL4
# --- Subset metadata to only Day 16 samples ---
sample_df_d16 <- sample_df %>% dplyr::filter(day == "D16")
sample_ids_d16 <- sample_df_d16$sample_id

# --- Subset expression matrix to Day 16 samples and selected genes ---
mat_subset_d16 <- log_mat[genes_found, sample_ids_d16, drop = FALSE]

# --- Scale by gene (z-score) ---
mat_scaled_d16 <- t(scale(t(mat_subset_d16)))

# --- Build annotation table for columns (samples) ---
annotation_col <- data.frame(
  Condition = sample_df_d16$condition
)
rownames(annotation_col) <- sample_df_d16$sample_id

# --- Define colors ---
ann_colors <- list(
  Condition = c(Healthy = "#E0BA3E", PKD = "#529DDA")
)

# --- Draw the heatmap ---
pheatmap(
  mat_scaled_d16,
  color = colorRampPalette(c("blue", "white", "red"))(100),
  cluster_rows = TRUE,
  cluster_cols = TRUE,
  annotation_col = annotation_col,
  annotation_colors = ann_colors,
  fontsize_row = 10,
  fontsize_col = 10,
  border_color = NA,
  scale = "none",
  main = "Day 16: PKD vs Healthy",
  clustering_method = "ward.D2",
  show_rownames = TRUE,
  show_colnames = FALSE,
  labels_row = rownames(mat_scaled_d16),
  labels_col = colnames(mat_scaled_d16),
  fontfamily = "Helvetica Neue Light"
)

# Simplify condition (merge PKD_10 and PKD_11 into PKD)
sample_df <- sample_df %>%
  mutate(
    condition = case_when(
      group == "WP3" ~ "Healthy",
      group %in% c("PKD_10", "PKD_11") ~ "PKD"
    ),
    condition_day = paste0(condition, "_", day)
  )

# Define the order you want to appear in the heatmap
sample_df <- sample_df %>%
  mutate(condition_day = factor(condition_day,
                                levels = c("Healthy_D12", "Healthy_D16",
                                           "PKD_D12", "PKD_D16")))

# Now reorder your metadata according to condition_day and replicate within day
sample_df <- sample_df %>%
  arrange(condition_day, condition, day, rep)

# Reorder your scaled expression matrix columns to match sample_df order
mat_scaled <- mat_scaled[, sample_df$sample_id, drop = FALSE]

# Confirm order
colnames(mat_scaled)
##  [1] "WP3_D12_1"    "WP3_D12_2"    "WP3_D16_1"    "WP3_D16_2"    "PKD_10_D12_1"
##  [6] "PKD_11_D12_1" "PKD_10_D12_2" "PKD_11_D12_2" "PKD_10_D16_1" "PKD_11_D16_1"
## [11] "PKD_10_D16_2" "PKD_11_D16_2"
annotation_col <- data.frame(
  Condition = sample_df$condition,
  Day = sample_df$day
)
rownames(annotation_col) <- sample_df$sample_id

# Reorder annotation table to match the matrix
annotation_col <- annotation_col[colnames(mat_scaled), ]


ann_colors <- list(
  Condition = c(Healthy = "#E0BA3E", PKD = "#529DDA"),
  Day = c(D12 = "#b3b3b3", D16 = "#4d4d4d")
)
library(pheatmap)

pheatmap(
  mat_scaled,
  color = colorRampPalette(c("blue", "white", "red"))(100),
  cluster_rows = TRUE,
  cluster_cols = FALSE,   # keep your custom column order
  annotation_col = annotation_col,
  annotation_colors = ann_colors,
  fontsize = 10,
  border_color = NA,
  main = "Top 50 DEGs (Healthy vs PKD, by Day)",
  show_rownames = TRUE,
  show_colnames = FALSE,
  
)